1. FUNCTION SMOOTHS THE DATA IN A WINDOW OF ONE-WEEK INTERVAL
Smooth.Case():
* Input: dataframe of observations the selected state’s date, cases
* Output: dataframe of observations with the state’s cases, smoothed cases, and date
library(dplyr) # for mutate(), rename() of columns
library(magrittr) # for pipe %>% operator
library(smoother) # for smth()
Smooth.Cases <- function(Cases) {
Cases %>%
arrange(Date) %>%
mutate(Cases_Smth=round(smth(Cases, window=7, tails=TRUE))) %>%
select(Date, Cases, Cases_Smth)
}
2. FUNCTION PLOTS THE ORIGINAL AND THE SMOOTHED DATA
library(plotly) # for interactive ggplotly()
## Warning: package 'plotly' was built under R version 3.6.2
Plot.Smth <- function(Smoothed_Cases) {
plot <- Smoothed_Cases %>% ggplot(aes(x=Date, y=Cases)) +
geom_line(linetype='dotted', color='#429890') +
geom_line(aes(y=Cases_Smth), color='#E95D0F') +
labs(title='Daily Confirmed Cases (Original & Smoothed)', x=NULL, y=NULL) +
theme(plot.title=element_text(hjust=0.5, color='steelblue'))
}
3. FUNCTION COMPUTES THE LOG-LIKELIHOOD
Gamma = 1/serial interval
The serial interval of COVID-19 is defined as the time duration between a primary case-patient (infector) having symptom onset and a secondary case-patient (infectee) having symptom onset.
The mean interval was 3.96 days according to CDC sources:
https://wwwnc.cdc.gov/eid/article/26/6/20-0357_article
Comp.Log_Likelihood()
* Input: dataframe of observations with the selected state’s date, cases, smoothed cases
* Output: dataframe of observations with the state’s cases, smoothed cases, Rt, Rt’s log-likelihood
library(purrr) # for map() and map2()
library(tidyr) # for unnest()
RT_MAX <- 10 # the max value of Effective Reproduction Rate Rt
# Generate a set of RT_MAX * 100 + 1 Effective Reproduction Rate value Rt
rt_set <- seq(0, RT_MAX, length=RT_MAX * 100 + 1)
GAMMA <- 1/4
Comp.Log_Likelihood <- function(Acc_Cases) {
likelihood <- Acc_Cases %>%
filter(Cases_Smth > 0) %>%
# Vectorize rt_set to form Rt column
mutate(Rt=list(rt_set),
# Compute lambda starting from the second to the last observation
Lambda=map(lag(Cases_Smth, 1), ~ .x * exp(GAMMA * (rt_set - 1))),
# Compute the log likelihood for every observation
Log_Likelihood=map2(Cases_Smth, Lambda, dpois, log=TRUE)) %>%
# Remove the first observation
slice(-1) %>%
# Remove Lambda column
select(-Lambda) %>%
# Flatten the table in columns Rt, Log_Likelihood
unnest(Log_Likelihood, Rt)
}
4. FUNCTION COMPUTES THE POSTERIOR OF THE EFFECTIVE REPRODUCTION RATE
Comp.Posterior()
* Input: dataframe of observations with the selected state’s date, cases, smoothed cases, Rt, Rt’s log-likelihood
* Output: dataframe of observations with the state’s cases, smoothed cases, Rt, Rt’s posterior
library(zoo) # for rollapplyr()
## Warning: package 'zoo' was built under R version 3.6.2
Comp.Posterior <- function(likelihood) {
likelihood %>%
arrange(Date) %>%
group_by(Rt) %>%
# Compute the posterior for every Rt by a sum of 7-day log-likelihood
mutate(Posterior=exp(rollapplyr(Log_Likelihood, 7, sum, partial=TRUE))) %>%
group_by(Date) %>%
# Normalize the posterior
mutate(Posterior=Posterior/sum(Posterior, na.rm=TRUE)) %>%
# Fill missing value of posterior with 0
mutate(Posterior=ifelse(is.nan(Posterior), 0, Posterior)) %>%
ungroup() %>%
# Remove Log_Likelihood column
select(-Log_Likelihood)
}
5. FUNCTION PLOTS POSTERIOR OF THE EFFECTIVE REPRODUCTION RATE
Plot.Posterior <- function(posteriors) {
posteriors %>% ggplot(aes(x=Rt, y=Posterior, group=Date)) +
geom_line(color='#E95D0F', alpha=0.4) +
labs(title='Daily Posterior of Rt', subtitle=state) +
coord_cartesian(xlim=c(0.2, 5)) +
theme(plot.title=element_text(hjust=0.5, color='steelblue'))
}
6. FUNCTION ESTIMATES THE EFFECTIVE REPRODUCTION RATE
Estimate.Rt()
* Input: csv dataframe of observations with the selected state’s cases, smoothed cases, Rt, Rt’s posterior
* Output: dataframe of observations with the state’s Rt, Rt_max, Rt_min
library(HDInterval)
Estimate.Rt <- function(posteriors) {
posteriors %>%
group_by(Date) %>%
summarize(Rts_sampled=list(sample(rt_set, 10000, replace=TRUE, prob=Posterior)),
Rt_MLL=rt_set[which.max(Posterior)]) %>%
mutate(Rt_MIN=map_dbl(Rts_sampled, ~ hdi(.x)[1]),
Rt_MAX=map_dbl(Rts_sampled, ~ hdi(.x)[2])) %>%
select(-Rts_sampled)
}
7. FUNCTION PLOTS THE EFFECTIVE REPRODUCTION RATE’S APPROXIMATION
Plot.Rt <- function(Rt_estimated) {
plot <- Rt_estimated %>%
ggplot(aes(x=Date, y=Rt_MLL)) +
geom_point(color='#429890', alpha=0.5, size=1) +
geom_line(color='#E95D0F') +
geom_hline(yintercept=1, linetype='dashed', color='red') +
geom_ribbon(aes(ymin=Rt_MIN, ymax=Rt_MAX), fill='black', alpha=0.5) +
labs(title='Estimated Effective Reproduction Rate Rt', x=NULL, y='Rt') +
coord_cartesian(ylim=c(0, 4)) +
theme(plot.title=element_text(hjust=0.5, color='steelblue'))
}
8. SET UP FOR COMPUTATIONS
# Read the local data file into dataframe
library(readr)
cv19 <- read_csv('./state.csv')
library(knitr)
library(kableExtra)
col <- c('Date','NY','CA','MI','LA','NH','IN','AR','ID','AZ','HI','AK','FL','TX')
kable(cv19[seq(0,80,5), col],
caption='Samples of Daily Counts of COVID-19 Cases in Some States') %>%
kable_styling(full_width=F, position='center',
bootstrap_options=c('striped', 'bordered', 'hover'))
| Date | NY | CA | MI | LA | NH | IN | AR | ID | AZ | HI | AK | FL | TX |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2020-01-30 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-02-15 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-02-28 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-03-04 | 9 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 2020-03-09 | 36 | 19 | 0 | 1 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
| 2020-03-14 | 192 | 39 | 8 | 41 | 0 | 4 | 0 | 4 | 3 | 2 | 0 | 39 | 12 |
| 2020-03-19 | 1770 | 77 | 254 | 112 | 5 | 23 | 29 | 12 | 17 | 10 | 3 | 104 | 60 |
| 2020-03-24 | 4790 | 369 | 463 | 216 | 7 | 106 | 21 | 23 | 92 | 13 | 6 | 185 | 58 |
| 2020-03-29 | 7195 | 0 | 836 | 225 | 44 | 282 | 17 | 49 | 146 | 24 | 12 | 912 | 500 |
| 2020-04-03 | 10482 | 1510 | 1953 | 1147 | 61 | 398 | 55 | 122 | 171 | 34 | 10 | 1260 | 661 |
| 2020-04-08 | 10453 | 1092 | 1376 | 746 | 41 | 436 | 3 | 22 | 151 | 25 | 13 | 1194 | 1091 |
| 2020-04-13 | 6337 | 554 | 997 | 421 | 35 | 308 | 130 | 27 | 163 | 5 | 5 | 1124 | 422 |
| 2020-04-18 | 7090 | 1435 | 768 | 462 | 55 | 487 | 44 | 13 | 212 | 21 | 5 | 739 | 889 |
| 2020-04-23 | 6244 | 1973 | 1325 | 481 | 82 | 601 | 207 | 34 | 310 | 4 | 2 | 1072 | 875 |
| 2020-04-28 | 3110 | 1567 | 1052 | 218 | 72 | 627 | 58 | 35 | 232 | 2 | 6 | 708 | 874 |
| 2020-05-03 | 3438 | 1419 | 547 | 200 | 89 | 638 | 59 | 0 | 276 | 0 | 3 | 615 | 1026 |
# Select a list of the U.S. states for modeling & computations
states <- col[2:5] # Select New York, California, Michigan, Louisiana
9. COMPUTE THEN PLOT THE ORIGINAL AND SMOOTHED CASES
df_cv19 <- list() # initialize list of plots for each of states
for (i in 1:length(states)) {
state <- states[i]
df_S <- cv19 %>% select(Date, state) %>% rename(Cases=state) %>% Smooth.Cases()
gplot <- df_S %>% Plot.Smth()
plot <- ggplotly(gplot) %>% add_annotations(text=state,font=list(size=14, color='#B51C35'),
xref='paper', yref='paper', x=0, y=0, showarrow=FALSE)
if (i == 1) {
plot <- plot %>% add_annotations(text='.... Original', font=list(size=14, color='#429890'),
xref='paper', yref='paper', x=0.2, y=0, showarrow=FALSE) %>%
add_annotations(text='--- Smoothed', font=list(size=14, color='#E95D0F'),
xref='paper', yref='paper', x=0.6, y=0, showarrow=FALSE)
}
df_cv19[[i]] <- plot
}
df_cv19 %>% subplot(nrows=length(states), shareX=TRUE)
10. PLOT THE POSTERIORS OF RT FOR EACH STATE IN THE LIST
df_cv19 <- list() # reset list of plots for each of states
for (i in 1:length(states)) {
state <- states[i]
df_P <- cv19 %>% select(Date, state) %>% rename(Cases=state) %>% Smooth.Cases %>%
Comp.Log_Likelihood() %>% Comp.Posterior()
gplot <- df_P %>% Plot.Posterior()
plot <- ggplotly(gplot) %>% add_annotations(text=state,
font=list(size=14, color='#B51C35'),
xref='paper', yref='paper', x=0, y=1, showarrow=FALSE)
df_cv19[[i]] <- plot
}
## Warning: unnest() has a new interface. See ?unnest for details.
## Try `df %>% unnest(c(Log_Likelihood, Rt))`, with `mutate()` if needed
## Warning: unnest() has a new interface. See ?unnest for details.
## Try `df %>% unnest(c(Log_Likelihood, Rt))`, with `mutate()` if needed
## Warning: unnest() has a new interface. See ?unnest for details.
## Try `df %>% unnest(c(Log_Likelihood, Rt))`, with `mutate()` if needed
## Warning: unnest() has a new interface. See ?unnest for details.
## Try `df %>% unnest(c(Log_Likelihood, Rt))`, with `mutate()` if needed
df_cv19 %>% subplot(nrows=length(states), shareX=TRUE)